rm(list = ls())
library(stargazer)
library(ggplot2)
library(lmtest)
library(sandwich)
library(gmodels)
library(dplyr)
library(lazyeval)
library(tidyr)
library(fastDummies)
library(haven)

Data <- read_dta("NORC_Data.dta")

NORC_Data <- Data  %>%
  as_tibble()

get_er_standard <- function(input_data, x_var){
  require("lazyeval")
  rel_column <- input_data  %>%
    select((x_var))  %>%
    setNames("xvariable")
  
  er_xvar <- 0.36*sd(rel_column$xvariable)
  return(er_xvar)
}

get_equiv_test <- function(input_data, x_var){
  require("lazyeval")
  
  
  #Function Testing
  #input_data <- NORC_Data
  #x_var <- "female"
  
  #Removing Observations with misisng values for x_var
  input_data <- drop_na(input_data, x_var)
  
  er_xvar <- get_er_standard(input_data, x_var)
    
  treatment_group <- input_data %>%
    select(treatment, (x_var)) %>%
    filter(treatment == 1) %>%
    select((x_var))
  
  control_group <- input_data %>%
    select(treatment, (x_var)) %>%
    filter(treatment == 0) %>% 
    select((x_var))
  
  m <- nrow(treatment_group)
  n <- nrow(control_group)
  
  mean_treatment_xvar <- colMeans(treatment_group)
  mean_control_xvar <- colMeans(control_group)
  
  ssd_treatment_xvar <- sum((treatment_group - mean_treatment_xvar)^2)
  ssd_control_xvar <- sum((control_group - mean_control_xvar)^2)
  
  T_num <- sqrt(m*n*(m+n-2)/(m+n))*(mean_treatment_xvar-mean_control_xvar)
  T_denom <- sqrt(ssd_treatment_xvar+ssd_control_xvar)
  T_xvar <- T_num/T_denom 
  
  ncp_xvar <- (m*n*er_xvar^2)/(m+n)
  df_2_xvar <- (m+n-2)
  
  
  p_value <- pf((T_xvar)^2, 1, df_2_xvar, ncp_xvar, lower.tail = TRUE, log.p = FALSE)
  
  quantile_ncF <- qf(0.05, 1, df_2_xvar, ncp_xvar, lower.tail = TRUE, log.p = FALSE)
  critval_ncF <- sqrt(quantile_ncF)
  
  rejection_outcome <- abs(T_xvar) <= critval_ncF
  
  T_output <- list(mean_treatment_xvar, mean_control_xvar, er_xvar, p_value)
  names(T_output) <- c("Mean Treament", "Mean Control", "Epsilon Value", "Equiv. Test p-value")
  T_output <- do.call(cbind.data.frame, T_output)
  return(T_output)
}

#Creating Dummies
NORC_Data <- dummy_cols(NORC_Data, select_columns=c("age_4_cat", "educ_4_cat", "race_cat", "employment_status", "census_4_cat", "survey_mode", "hh_inc_5_cat", "wave_cat", "rep_pre_treatment"))

T_output_female <- get_equiv_test(NORC_Data, "female")

T_output_age_4_cat_1 <- get_equiv_test(NORC_Data, "age_4_cat_1")
T_output_age_4_cat_2 <- get_equiv_test(NORC_Data, "age_4_cat_2")
T_output_age_4_cat_3 <- get_equiv_test(NORC_Data, "age_4_cat_3")
T_output_age_4_cat_4 <- get_equiv_test(NORC_Data, "age_4_cat_4")

T_output_educ_4_cat_1 <- get_equiv_test(NORC_Data, "educ_4_cat_1")
T_output_educ_4_cat_2 <- get_equiv_test(NORC_Data, "educ_4_cat_2")
T_output_educ_4_cat_3 <- get_equiv_test(NORC_Data, "educ_4_cat_3")
T_output_educ_4_cat_4 <- get_equiv_test(NORC_Data, "educ_4_cat_4")

T_output_race_cat_1 <- get_equiv_test(NORC_Data, "race_cat_1")
T_output_race_cat_2 <- get_equiv_test(NORC_Data, "race_cat_2")
T_output_race_cat_3 <- get_equiv_test(NORC_Data, "race_cat_3")
T_output_race_cat_4 <- get_equiv_test(NORC_Data, "race_cat_4")
T_output_race_cat_5 <- get_equiv_test(NORC_Data, "race_cat_5")
T_output_race_cat_6 <- get_equiv_test(NORC_Data, "race_cat_6")

T_output_employment_status_1 <- get_equiv_test(NORC_Data, "employment_status_1")
T_output_employment_status_2 <- get_equiv_test(NORC_Data, "employment_status_2")
T_output_employment_status_3 <- get_equiv_test(NORC_Data, "employment_status_3")
T_output_employment_status_4 <- get_equiv_test(NORC_Data, "employment_status_4")
T_output_employment_status_5 <- get_equiv_test(NORC_Data, "employment_status_5")
T_output_employment_status_6 <- get_equiv_test(NORC_Data, "employment_status_6")
T_output_employment_status_7 <- get_equiv_test(NORC_Data, "employment_status_7")

T_output_census_4_cat_1 <- get_equiv_test(NORC_Data, "census_4_cat_1")
T_output_census_4_cat_2 <- get_equiv_test(NORC_Data, "census_4_cat_2")
T_output_census_4_cat_3 <- get_equiv_test(NORC_Data, "census_4_cat_3")
T_output_census_4_cat_4 <- get_equiv_test(NORC_Data, "census_4_cat_4")

T_output_survey_mode_1 <- get_equiv_test(NORC_Data, "survey_mode_1")

T_output_hh_inc_5_cat_1 <- get_equiv_test(NORC_Data, "hh_inc_5_cat_1")
T_output_hh_inc_5_cat_2 <- get_equiv_test(NORC_Data, "hh_inc_5_cat_2")
T_output_hh_inc_5_cat_3 <- get_equiv_test(NORC_Data, "hh_inc_5_cat_3")
T_output_hh_inc_5_cat_4 <- get_equiv_test(NORC_Data, "hh_inc_5_cat_4")
T_output_hh_inc_5_cat_5 <- get_equiv_test(NORC_Data, "hh_inc_5_cat_5")

T_output_wave_cat_1 <- get_equiv_test(NORC_Data, "wave_cat_1")
T_output_wave_cat_2 <- get_equiv_test(NORC_Data, "wave_cat_2")
T_output_wave_cat_3 <- get_equiv_test(NORC_Data, "wave_cat_3")

T_output_pt_rep_1 <- get_equiv_test(NORC_Data, "rep_pre_treatment_1")


NORC_Data <- NORC_Data %>%
  mutate(nonresp_rep = as.numeric(is.na(rep_response)) ) %>%
  mutate(nonresp_app = as.numeric(is.na(app_response)) ) 
  


bal_table_output <- rbind(T_output_female, T_output_age_4_cat_1, T_output_age_4_cat_2, T_output_age_4_cat_3, T_output_age_4_cat_4, T_output_educ_4_cat_1, T_output_educ_4_cat_2, T_output_educ_4_cat_3, T_output_educ_4_cat_4)
bal_table_output <- rbind(bal_table_output, T_output_race_cat_1, T_output_race_cat_2, T_output_race_cat_3, T_output_race_cat_4, T_output_race_cat_5, T_output_race_cat_6, 
                          T_output_employment_status_1, T_output_employment_status_2, T_output_employment_status_3, T_output_employment_status_4, T_output_employment_status_5, T_output_employment_status_6, T_output_employment_status_7,                        
                          T_output_census_4_cat_1, T_output_census_4_cat_2, T_output_census_4_cat_3, T_output_census_4_cat_4, T_output_survey_mode_1, T_output_hh_inc_5_cat_1, T_output_hh_inc_5_cat_2, T_output_hh_inc_5_cat_3, T_output_hh_inc_5_cat_4,
                          T_output_hh_inc_5_cat_5, T_output_wave_cat_1, T_output_wave_cat_2, T_output_wave_cat_3, T_output_pt_rep_1)

rownames(bal_table_output) <- c("Female", "Ages 18-29", "Ages 30-44", "Ages 45-59", "Ages 60+", "Less than HS", "HS", "Some college", "College grad or more", "White", "Black", "Other", "Hispanic", "2+ Races", "Asian/PI", 
                                "Working-Employee", "Working-Self-Employed", "Not Working-Temp. Layoff", "Not Working-Looking", "Not Working-Retired", "Not Working-Disabled", "Not Working-Other",
                                "Northeast", "Midwest", "South", "West", "Online Interview", "<$15,000", "$15,000-29,999", "$30,000-59,999", "$60,000-124,999", ">=$125,000", "Month 1", "Month 2", "Month 3", "Pre-Treatment Republican")


table_output <- stargazer(bal_table_output, summary = FALSE, label = "BalanceTest", type = "latex", title = "Balance Tests")
cat(table_output, sep = '\n', file = 'Output/Balance_Table.tex')